!pip install -q -U keras-tuner
pip install tflearn
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
from sklearn.linear_model import LogisticRegression,LinearRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, BatchNormalization,Dropout, Convolution2D,Input,Conv2D,MaxPooling2D,Flatten
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.initializers import GlorotUniform, GlorotNormal,HeNormal,HeUniform
from tensorflow.keras.regularizers import L2
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split,GridSearchCV,cross_val_score
import tensorflow
import cv2
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras.applications import VGG19
from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsClassifier
import tkinter as tk
from tkinter import ttk,Tk
from google.colab.patches import cv2_imshow
from google.colab import files
from tensorflow.keras.models import load_model
from PIL import Image
import kerastuner as kt
from kerastuner import RandomSearch
from kerastuner.engine.hyperparameters import HyperParameters
from google.colab import drive
drive.mount('/content/drive',force_remount=True)
#Import dataset
import tflearn.datasets.oxflower17 as oxflower17
(X,Y) = oxflower17.load_data()
#Dimension of data
print("Dimensions of data :", X.shape)
print("Dimensions of label :", Y.shape)
# The data contains 1360 rows of images of size 224*224*3 pixels, 224*224 being the size of each image and 3 being the channel size.
trainX,testX,trainY,testY = train_test_split(X,Y, test_size=0.25)
print("Minimum value in training set :",trainX.min())
print("Maximum value in training set :",trainX.max())
print("Minimum value in test set :",testX.min())
print("Maximum value in test set :",testX.max())
#Print shapes post splitting
print("X train shape :" ,trainX.shape)
print("Y train shape :" ,trainY.shape)
print("X test shape :" ,testX.shape)
print("Y test shape :" ,testY.shape)
#print first 5 values in test target set
testY[:5]
pd.value_counts(trainY)
pd.value_counts(testY)
#Display Images
for i in range(400,410):
plt.imshow(trainX[i],cmap='gray')
plt.show()
#Display labels
for i in range(400,410):
print("Label: ", trainY[i])
#Images vs Labels
for i in range(200,210):
print("Label :",trainY[i])
plt.imshow(trainX[i],cmap='gray')
plt.show()
#Apply different filters on images and display the images
#Blur
flowerImg = trainX[100]
blurredImg = cv2.blur(flowerImg,(9,9))
plt.subplot(121),plt.imshow(flowerImg),plt.title('Original')
plt.xticks([]), plt.yticks([])
plt.subplot(122),plt.imshow(blurredImg),plt.title('Blurred')
plt.xticks([]), plt.yticks([])
plt.show()
#Edge Detection: Vertical
edge_matrix= np.array([[-1, 0, 1],
[-1, 0, 1],
[-1, 0, 1]])
flowerImg2= trainX[200]
verticalEdge = cv2.filter2D(flowerImg2, -1, edge_matrix)
plt.subplot(121),plt.imshow(flowerImg2),plt.title('Original')
plt.xticks([]), plt.yticks([])
plt.subplot(122),plt.imshow(verticalEdge),plt.title('Picture with vertical edges')
plt.xticks([]), plt.yticks([])
plt.show()
#Edge Detection: Horizantal
edge_matrix_hor= np.array([[-1,-1, 1],
[0, 0, 0],
[-1,1, 1]])
HorizaontalEdge = cv2.filter2D(flowerImg2, -1, edge_matrix_hor)
plt.subplot(121),plt.imshow(flowerImg2),plt.title('Original')
plt.xticks([]), plt.yticks([])
plt.subplot(122),plt.imshow(HorizaontalEdge),plt.title('Picture with horizontal edges')
plt.xticks([]), plt.yticks([])
plt.show()
#Emboss
emboss_matrix= np.array([[-1,-1,0, 0],
[1,1,1,1],
[1,2,2,2],
[0,0,1,1]])
embossImg = cv2.filter2D(flowerImg2, -1, emboss_matrix)
plt.subplot(121),plt.imshow(flowerImg2),plt.title('Original')
plt.xticks([]), plt.yticks([])
plt.subplot(122),plt.imshow(embossImg),plt.title('Embossed picture')
plt.xticks([]), plt.yticks([])
plt.show()
#Smoothening
smooth_matrix = np.ones((5,5),np.float32)/25
smoothImg = cv2.filter2D(flowerImg2,-1,smooth_matrix)
plt.subplot(121),plt.imshow(flowerImg2),plt.title('Original')
plt.xticks([]), plt.yticks([])
plt.subplot(122),plt.imshow(smoothImg),plt.title('Averaging')
plt.xticks([]), plt.yticks([])
plt.show()
• Supervised learning algorithms for training
• Use neural networks for training
• Use CNN for training
• Use various CNN with transferred learning models for training
X_shaped=X.reshape(1360,150528)
#Standdard scaling and dimensionality reduction
scale = StandardScaler()
flowersScale = scale.fit_transform(X_shaped)
pca = PCA(n_components=50)
flowers_pca = pca.fit_transform(flowersScale)
print('PCA shape for train ds is: ', flowers_pca.shape)
(trainX_scale, testX_scale, trainY_scale, testY_scale) = train_test_split(flowers_pca, Y, test_size=0.25)
#SVC classifier model
linearsvc=SVC(C=0.90,decision_function_shape='ovo',kernel='rbf')
linearsvc.fit(trainX_scale,trainY_scale)
svc_pred=linearsvc.predict(testX_scale)
svc_pred_train=linearsvc.predict(trainX_scale)
print('SVC train Accuracy: %.3f' % accuracy_score(trainY_scale,svc_pred_train))
print('SVC test Accuracy: %.3f' % accuracy_score(testY_scale,svc_pred))
from sklearn.metrics import classification_report
print("Classification report : \n",classification_report(testY_scale,svc_pred))
#SVC with Hyperparameter tuning
tuned_parameters = [{'kernel': ['rbf'],'C': [0.1,0.5,0.8,0.9,1, 10, 100, 1000]}]
clf = GridSearchCV(SVC(), tuned_parameters, verbose = 3)
clf.fit(trainX_scale,trainY_scale)
clf.best_score_
clf.best_params_
results= clf.cv_results_
results
linearsvc=SVC(C=10,decision_function_shape='ovo',kernel='rbf')
scores=cross_val_score(linearsvc,trainX_scale,trainY_scale,cv=15)
scores
np.mean(scores)
#Neural network
trainX_reshape =trainX.reshape(1020,150528) #224*224*3 =150528
testX_reshaped = testX.reshape(340,150528)
trainY_encode=tensorflow.keras.utils.to_categorical(trainY,num_classes=17)
testY_encode =tensorflow.keras.utils.to_categorical(testY,num_classes=17)
def build_NNmodel(hp):
model = Sequential([
Dense(
units=hp.Int('Dense1', min_value=32, max_value=512, step=32),
activation='relu',
input_shape =(150528,),
kernel_initializer =hp.Choice('wt_initialiser', values=['glorot_uniform','glorot_normal','he_uniform','he_normal']),
# kernel_regularizer=L2(hp.Choice('regulariser', values=[1E-2,2e-2, 1E-3,2e-3 ,1E-4, 2e-4,1E-5,2e-5]))
),
# BatchNormalization(),
# Dropout(hp.Choice('dropoutRate1', values=[0.4,.5,.6])),
Dense(
units=hp.Int('Dense2', min_value=32, max_value=512, step=32),
activation='relu',
kernel_initializer =hp.Choice('wt_initialiser1', values=['glorot_uniform','glorot_normal','he_uniform','he_normal']),
# kernel_regularizer=L2(hp.Choice('regulariser1', values=[1E-2,2e-2, 1E-3,2e-3 ,1E-4, 2e-4,1E-5,2e-5]))
),
# BatchNormalization(),
# Dropout(hp.Choice('dropoutRate2', values=[0.4,.5,.6])),
Dense(
units=hp.Int('Dense3', min_value=32, max_value=512, step=32),
activation='relu',
kernel_initializer =hp.Choice('wt_initialiser2', values=['glorot_uniform','glorot_normal','he_uniform','he_normal']),
#kernel_regularizer=L2(hp.Choice('regulariser2', values=[1E-2, 1E-3,1E-4, 1E-5]))
),
# BatchNormalization(),
# Dropout(hp.Choice('dropoutRate3', values=[0.4,.5,.6])),
Dense(
units=hp.Int('Dense4', min_value=32, max_value=512, step=32),
activation='relu',
kernel_initializer =hp.Choice('wt_initialiser3', values=['glorot_uniform','glorot_normal','he_uniform','he_normal']) ,
#kernel_regularizer=L2(hp.Choice('regulariser3', values=[1E-2,1E-3,1E-4, 1E-5]))
),
# BatchNormalization(),
# Dropout(hp.Choice('dropoutRate4', values=[0.4,.5,.6])),
# BatchNormalization(),
Dense(17, activation='softmax',kernel_regularizer=L2(hp.Choice('regulariser8', values=[1E-2, 1E-3,1E-4, 1E-5])),
kernel_initializer =hp.Choice('wt_initialiser8', values=['glorot_uniform','glorot_normal','he_uniform','he_normal']))
])
model.compile(optimizer=SGD(hp.Choice('learning_rate', values=[1E-2,1E-3,1E-4])),
loss='categorical_crossentropy',
metrics=['accuracy'])
return model
tuner_search=RandomSearch(build_NNmodel,objective='acc',max_trials=5,directory='output65',project_name="Flower")
tuner_search.search(trainX_reshape,trainY_encode,epochs=20,validation_data=(testX_reshaped,testY_encode))
modelFlowerNN = tuner_search.get_best_models(num_models=1)[0]
modelFlowerNN.fit(trainX_reshape,trainY_encode,epochs=50,validation_data=(testX_reshaped,testY_encode))
print("Test set accuracy :",modelFlowerNN.evaluate(testX_reshaped,testY_encode))
prediction = np.argmax(modelFlowerNN.predict(testX_reshaped), axis=-1)
for i in range(5,10):
print("Actual Label :",testY_encode[i])
print("Predicted label: ",prediction[i])
train_gen=ImageDataGenerator(
rotation_range=40,
width_shift_range=0.2,
height_shift_range=0.2,
rescale=1./255,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True,
fill_mode='nearest',
validation_split=0.25)
train_generator = train_gen.flow_from_directory(
'/content/17flowers/jpg',
target_size=(224, 224),
batch_size=32,
class_mode='categorical',
subset="training")
validation_generator = train_gen.flow_from_directory(
'/content/17flowers/jpg',
target_size=(224, 224),
batch_size=32,
class_mode='categorical',
subset="validation")
trainXCNN,trainYCNN= next(train_generator)
testXCCNN,testYCNN =next(validation_generator)
checkpointFlower = tensorflow.keras.callbacks.ModelCheckpoint('/content/drive/MyDrive/GL Assignment/Convolutional Neural network/FlowersClassifierCNN.h5', #where to save the model
save_best_only=True,
monitor='val_acc',
mode='max',
verbose = 1)
#CNN for training
#CNN for training
modelCNN = Sequential()
modelCNN.add(Conv2D(filters=128, kernel_size=3, activation="relu", input_shape=(224,224,3),kernel_initializer='he_normal'))
modelCNN.add(MaxPooling2D(pool_size=(2,2)))
modelCNN.add(BatchNormalization())
modelCNN.add(Conv2D(filters=128, kernel_size=3, activation="relu",kernel_initializer='he_normal'))
modelCNN.add(MaxPooling2D(pool_size=(2,2)))
modelCNN.add(BatchNormalization())
modelCNN.add(Conv2D(filters=64, kernel_size=3, activation="relu",kernel_initializer='he_normal'))
modelCNN.add(MaxPooling2D(pool_size=(2,2)))
modelCNN.add(Conv2D(filters=64, kernel_size=3, activation="relu",kernel_initializer='he_normal'))
modelCNN.add(MaxPooling2D(pool_size=(2,2)))
modelCNN.add(Dropout(rate=0.4))
modelCNN.add(Flatten())
modelCNN.add(Dense(256, activation="relu"))
modelCNN.add(BatchNormalization())
modelCNN.add(Dropout(rate=0.4))
modelCNN.add(Dense(128, activation="relu"))
modelCNN.add(BatchNormalization())
modelCNN.add(Dropout(rate=0.4))
modelCNN.add(Dense(64, activation="relu"))
modelCNN.add(BatchNormalization())
modelCNN.add(Dropout(rate=0.3))
modelCNN.add(Dense(17, activation="softmax"))
# Compile the model
#sgd = SGD(learning_rate=0.00027483485594610876,decay= 1e-5, momentum=0.9)
modelCNN.load_weights('/content/drive/MyDrive/GL Assignment/Convolutional Neural network/FlowersClassifierCNN.h5')
modelCNN.compile(loss="categorical_crossentropy", metrics=["accuracy"], optimizer='adam')
# Fit the model
#modelCNN.fit( x=trainXCNN, y=trainYCNN, batch_size=200, epochs=10)
historyFlowerCNN = modelCNN.fit(train_generator,
steps_per_epoch= 1020 // 16,
validation_data = validation_generator,
validation_steps = 340//16,
callbacks = [checkpointFlower],
epochs=10,verbose=1)
modelCNN.evaluate(testXCCNN,testYCNN)
modelCNN.evaluate(trainXCNN,trainYCNN)
modelCNN.save("/content/drive/MyDrive/GL Assignment/Convolutional Neural network/FlowersClassifierCNN.h5")
predictFlowerCNN = np.argmax(modelCNN.predict(testXCCNN),axis=-1)
for i in range(20,30):
print("Predicted value: ",predictFlowerCNN[i])
print("Actual value: ",testYCNN[i])
plt.imshow(testXCCNN[i])
plt.show()
#Initialise and download VGG19 model weights excluding the fully connected layer.
vgg_model = VGG19(include_top = False, weights = 'imagenet', input_shape = (224,224,3))
%cd /content/drive/MyDrive/GL Assignment/Convolutional Neural network
checkpointFlowerVGG = tensorflow.keras.callbacks.ModelCheckpoint('/content/drive/MyDrive/GL Assignment/Convolutional Neural network/FlowersClassifierVGG.h5', #where to save the model
save_best_only=True,
monitor='val_acc',
mode='max',
verbose = 1)
modelVGG= Sequential()
vgg_model.trainable = False
for layer in vgg_model.layers:
modelVGG.add(layer)
modelVGG.add(Flatten())
modelVGG.add(BatchNormalization())
modelVGG.add(Dropout(0.2))
modelVGG.add(Dense(128, activation='relu',kernel_initializer='he_uniform'))
modelVGG.add(Dropout(0.2))
modelVGG.add(Dense(128, activation='relu',kernel_initializer='he_uniform'))
modelVGG.add(Dropout(0.2))
modelVGG.add(BatchNormalization())
modelVGG.add(Dense(40, activation='relu',kernel_initializer='he_uniform'))
modelVGG.add(Dense(17, activation='softmax'))
#learn_rate=0.001
#sgd=SGD(lr=learn_rate,momentum=.9)
modelVGG.load_weights('/content/drive/MyDrive/GL Assignment/Convolutional Neural network/FlowersClassifierVGG.h5')
modelVGG.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])
historyVGG= modelVGG.fit(train_generator,epochs=20,validation_data=(validation_generator),callbacks = [checkpointFlowerVGG],verbose=1)
#save the model for future use
modelVGG.save('/content/drive/MyDrive/GL Assignment/Convolutional Neural network/FlowersClassifierVGG.h5')
modelVGG.evaluate(trainXCNN,trainYCNN)
modelVGG.evaluate(testXCCNN,testYCNN)
modelVGG = load_model('/content/drive/MyDrive/GL Assignment/Convolutional Neural network/FlowersClassifierVGG.h5')
#Predictions
flowerPredictions = np.argmax(modelVGG.predict(testXCCNN), axis=-1)
#Compare actual values and predicted values along with the images
for i in range(23,29):
print("Predicted value: ",flowerPredictions[i])
print("Actual value: ",testYCNN[i])
plt.imshow(testXCCNN[i])
plt.show()
#5 out 6 of our predictions are correct
Accuracies for the models are as follows:
Out of the four models, CNN model has performed the best.We have used VGG model for transferred learning so that the trained weights can be utlilised to improve accuracy in lesser time. Hence transfer learning with VGG19 has given an accuracy of 81.25 for our oxford flower dataset having 17 categories.
tkWind =tk.Tk()
tkWind.title('CLASSIFIER GUI-Great Learning')
#canvas = Canvas(tkWind, width = 300, height = 300)
#canvas.pack()
#Step 1 :File Name
fileNamelbl = ttk.Label(tkWind,text='Step 1: File Name')
fileNamelbl.grid(row=0, column=0,sticky=tk.W)
name_var=tk.StringVar()
name_entry =ttk.Entry(tkWind,textvariable=name_var)
name_entry.grid(row=0, column=1)
#df = pd.DataFrame()
def import_data():
base_path ='/content/drive/MyDrive/GL Assignment/Convolutional Neural network/'
fileName =(name_var.get())
path =base_path+fileName#
flowerImg = cv2.imread(fileName)
#cv2_imshow(flowerImg)
return flowerImg
# Output img with window name as 'image'
#cv2_imshow(img)
#Imprt data button
importButton =ttk.Button(tkWind,text='Import data', command=import_data)
importButton.grid(row=0,column=2)
def predict_class():
model2 = load_model('/content/drive/MyDrive/GL Assignment/Convolutional Neural network/FlowersClassifierVGG.h5')
#model2= load_model('FlowersClassifierVGG.h5')
flowerImage= import_data()
resized = cv2.resize(flowerImage, (224,224), interpolation=cv2.INTER_AREA)
flowerImage = np.reshape(resized,[1,224,224,3])
predictFlower =model2.predict_classes(flowerImage)
flowerLabels = (train_generator.class_indices)
flowerLabels = dict((v,k) for k,v in flowerLabels.items())
prediction = flowerLabels[predictFlower[0]]
predictString_lbl = ttk.Entry(tkWind)
predictString_lbl.grid(row=1,column=3)
predictString_lbl.insert(1,str(prediction))
print(predictFlower)
predictImage =ttk.Button(tkWind,text='Predict', command=predict_class)
predictImage.grid(row=1,column=2)
tkWind.mainloop()
Some points one can follow after the model is deployed to production environment:
Nature of data keeps changing all the time. So, it needs to be continuously monitored or gathered in real-time. Incoming data and distribution can be verified it is like the distribution of training data with which the model was trained. Data Changes can be tracked by few statistics like mean, variance, standard deviation, correlation etc.
To validate the model’s performance in real-time, labels/predictions can be collected for a specific timeframe. Performance metrics can be calculated. This can be checked with metrics value obtained during training phase.
If the above two aspects are significantly varying from original setup, then our model can be retrained with chunks of latest incoming data. Model performance needs to be continuously tracked and can be retrained accordingly as per business requirements.